We’re curious to see if there is a relationship between longest leaf length to total shoot biomass. Data available: In 2017 21 sites were surveyed for seagrass. At each site, 8 0.25m^2 quadrats were surveyed and 5 entire shoots (i.e. multiple leaves) were removed and brought back to the lab to be measured (in cm). Each shoot (n = 840) was weighed to get total fresh and dry biomass (in grams) and each leaf on the shoot had its length (cm) and width (cm) measured.

We are interested in the length of the longest leaf and the dry biomass. # Data import

eelgrass <- read.csv("..//ALL_DATA/seagrass_biometrics_CLEAN.csv")

Libraries

## ── Attaching packages ────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select

Data Cleaning

# extract what we're interested in 
str(eelgrass)
## 'data.frame':    840 obs. of  71 variables:
##  $ site                  : Factor w/ 21 levels "2017_H_01","2017_H_02",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ collection_date       : Factor w/ 18 levels "20/8/17","21/8/17",..: 18 18 18 18 18 18 18 18 18 18 ...
##  $ YYYYMMDD              : int  20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 ...
##  $ quadrat               : int  1 1 1 1 1 2 2 2 2 2 ...
##  $ plant                 : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ rhi_length            : num  5 5 5 4.6 5 4.6 4 5 5 3.6 ...
##  $ node1                 : num  0.5 0.7 0.3 0.4 0.2 1.2 0.3 0.3 0.5 0.4 ...
##  $ node2                 : num  0.4 0.4 0.3 0.4 0.2 0.6 0.3 0.2 0.6 0.6 ...
##  $ node3                 : num  0.3 0.3 0.4 0.3 0.3 0.4 0.2 0.2 0.4 0.4 ...
##  $ node4                 : num  0.2 0.3 0.3 0.4 0.2 0.3 0.2 0.2 0.3 0.2 ...
##  $ node5                 : num  0.2 0.3 0.2 0.2 0.3 0.3 0.2 0.3 0.2 0.2 ...
##  $ leaf_length1          : num  22.1 25.2 16.4 21 13 32.4 23 16.5 26 26.5 ...
##  $ leaf_length2          : num  18.9 17.6 9.8 18 13.6 22.1 12.3 21.1 21.1 18.1 ...
##  $ leaf_length3          : num  14.5 22 14.1 8.5 9.6 25 16.1 22.7 20.2 25.7 ...
##  $ leaf_length4          : num  6.4 14.9 9.4 17 NA 27.3 9.3 8.7 10.2 11.3 ...
##  $ leaf_length5          : num  13.3 8 NA 16.5 NA 12.4 15.5 15.5 17.5 22.7 ...
##  $ leaf_length6          : num  NA 3.6 NA NA NA 9.2 NA NA NA NA ...
##  $ leaf_length7          : num  NA NA NA NA NA 6 NA NA NA NA ...
##  $ leaf_length8          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length9          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length10         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width1           : num  0.3 0.3 0.2 0.3 0.2 0.4 0.2 0.2 0.3 0.4 ...
##  $ leaf_width2           : num  0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.2 0.3 ...
##  $ leaf_width3           : num  0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width4           : num  0.2 0.3 0.2 0.2 NA 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width5           : num  0.2 0.3 NA 0.2 NA 0.2 0.3 0.3 0.2 0.4 ...
##  $ leaf_width6           : num  NA 0.2 NA NA NA 0.2 NA NA NA NA ...
##  $ leaf_width7           : num  NA NA NA NA NA 0.3 NA NA NA NA ...
##  $ leaf_width8           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width9           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width10          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ pad_mass_g            : num  0.543 0.529 0.536 0.491 0.592 ...
##  $ pad_epiphyte_mass_g   : num  0.549 0.53 0.543 0.494 0.596 ...
##  $ rhi_mass_fw           : num  0.227 0.488 0.346 0.397 0.455 ...
##  $ rhi_foil              : num  0.658 0.427 0.45 0.376 0.55 ...
##  $ rhi_foil_dw_g         : num  0.684 0.478 0.49 0.411 0.586 ...
##  $ shoot_mass_fw         : num  0.269 0.412 0.165 0.321 0.116 ...
##  $ shoot_foil            : num  0.905 0.728 0.758 0.611 0.708 ...
##  $ shoot_foil_dw         : num  0.944 0.791 0.783 0.66 0.724 ...
##  $ xs_shoot_mass_fw      : num  4.77 NA NA NA NA ...
##  $ xs_shoot_foil         : num  0.789 NA NA NA NA ...
##  $ xs_shoot_foil_dw      : num  1.38 NA NA NA NA ...
##  $ xs_pad_mass_g         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ xs_epiphyte_pad_mass_g: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_count             : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_fw                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil              : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil_dw           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gamm_amph_count       : int  3 NA NA NA NA 21 NA NA NA NA ...
##  $ gamm_amph_fw          : num  0.0059 NA NA NA NA 0.036 NA NA NA NA ...
##  $ gamm_amph_foil        : num  0.495 NA NA NA NA ...
##  $ gamm_amph_foil_dw     : num  0.497 NA NA NA NA ...
##  $ caprel_count          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_fw             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil_dw        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_count          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_fw             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil_dw        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_count       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_fw          : Factor w/ 80 levels "","0.0011","0.0012",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ gastropod_foil        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_foil_dw     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_count            : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_fw               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil_dw          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ polychaete_count      : int  6 NA NA NA NA 7 NA NA NA NA ...
##  $ other_animal_notes    : Factor w/ 33 levels "","\"crab\" is tellmesus",..: 1 1 1 1 1 12 1 1 1 1 ...
##  $ notes                 : Factor w/ 5 levels "","caprel insignificant weight",..: 1 1 1 1 1 1 1 1 1 1 ...
eel <- eelgrass %>%
  mutate(shoot_dw = shoot_foil_dw-shoot_foil)  # calculate dry weight
# determin max leaf length
eel2 <- eel %>% 
  rowwise() %>%
  mutate(max_length = max(leaf_length1, leaf_length2, leaf_length3, leaf_length4, leaf_length5, leaf_length6, leaf_length7, leaf_length8, leaf_length9, leaf_length10, na.rm=TRUE)) %>%
  mutate(mean_length = mean(leaf_length1, leaf_length2, leaf_length3, leaf_length4, leaf_length5, leaf_length6, leaf_length7, leaf_length8, leaf_length9, leaf_length10, na.rm=TRUE))
## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf

## Warning in max(leaf_length1, leaf_length2, leaf_length3, leaf_length4,
## leaf_length5, : no non-missing arguments to max; returning -Inf
str(eel2)
## Classes 'rowwise_df', 'tbl_df', 'tbl' and 'data.frame':  840 obs. of  74 variables:
##  $ site                  : Factor w/ 21 levels "2017_H_01","2017_H_02",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ collection_date       : Factor w/ 18 levels "20/8/17","21/8/17",..: 18 18 18 18 18 18 18 18 18 18 ...
##  $ YYYYMMDD              : int  20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 ...
##  $ quadrat               : int  1 1 1 1 1 2 2 2 2 2 ...
##  $ plant                 : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ rhi_length            : num  5 5 5 4.6 5 4.6 4 5 5 3.6 ...
##  $ node1                 : num  0.5 0.7 0.3 0.4 0.2 1.2 0.3 0.3 0.5 0.4 ...
##  $ node2                 : num  0.4 0.4 0.3 0.4 0.2 0.6 0.3 0.2 0.6 0.6 ...
##  $ node3                 : num  0.3 0.3 0.4 0.3 0.3 0.4 0.2 0.2 0.4 0.4 ...
##  $ node4                 : num  0.2 0.3 0.3 0.4 0.2 0.3 0.2 0.2 0.3 0.2 ...
##  $ node5                 : num  0.2 0.3 0.2 0.2 0.3 0.3 0.2 0.3 0.2 0.2 ...
##  $ leaf_length1          : num  22.1 25.2 16.4 21 13 32.4 23 16.5 26 26.5 ...
##  $ leaf_length2          : num  18.9 17.6 9.8 18 13.6 22.1 12.3 21.1 21.1 18.1 ...
##  $ leaf_length3          : num  14.5 22 14.1 8.5 9.6 25 16.1 22.7 20.2 25.7 ...
##  $ leaf_length4          : num  6.4 14.9 9.4 17 NA 27.3 9.3 8.7 10.2 11.3 ...
##  $ leaf_length5          : num  13.3 8 NA 16.5 NA 12.4 15.5 15.5 17.5 22.7 ...
##  $ leaf_length6          : num  NA 3.6 NA NA NA 9.2 NA NA NA NA ...
##  $ leaf_length7          : num  NA NA NA NA NA 6 NA NA NA NA ...
##  $ leaf_length8          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length9          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length10         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width1           : num  0.3 0.3 0.2 0.3 0.2 0.4 0.2 0.2 0.3 0.4 ...
##  $ leaf_width2           : num  0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.2 0.3 ...
##  $ leaf_width3           : num  0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width4           : num  0.2 0.3 0.2 0.2 NA 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width5           : num  0.2 0.3 NA 0.2 NA 0.2 0.3 0.3 0.2 0.4 ...
##  $ leaf_width6           : num  NA 0.2 NA NA NA 0.2 NA NA NA NA ...
##  $ leaf_width7           : num  NA NA NA NA NA 0.3 NA NA NA NA ...
##  $ leaf_width8           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width9           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width10          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ pad_mass_g            : num  0.543 0.529 0.536 0.491 0.592 ...
##  $ pad_epiphyte_mass_g   : num  0.549 0.53 0.543 0.494 0.596 ...
##  $ rhi_mass_fw           : num  0.227 0.488 0.346 0.397 0.455 ...
##  $ rhi_foil              : num  0.658 0.427 0.45 0.376 0.55 ...
##  $ rhi_foil_dw_g         : num  0.684 0.478 0.49 0.411 0.586 ...
##  $ shoot_mass_fw         : num  0.269 0.412 0.165 0.321 0.116 ...
##  $ shoot_foil            : num  0.905 0.728 0.758 0.611 0.708 ...
##  $ shoot_foil_dw         : num  0.944 0.791 0.783 0.66 0.724 ...
##  $ xs_shoot_mass_fw      : num  4.77 NA NA NA NA ...
##  $ xs_shoot_foil         : num  0.789 NA NA NA NA ...
##  $ xs_shoot_foil_dw      : num  1.38 NA NA NA NA ...
##  $ xs_pad_mass_g         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ xs_epiphyte_pad_mass_g: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_count             : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_fw                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil              : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil_dw           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gamm_amph_count       : int  3 NA NA NA NA 21 NA NA NA NA ...
##  $ gamm_amph_fw          : num  0.0059 NA NA NA NA 0.036 NA NA NA NA ...
##  $ gamm_amph_foil        : num  0.495 NA NA NA NA ...
##  $ gamm_amph_foil_dw     : num  0.497 NA NA NA NA ...
##  $ caprel_count          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_fw             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil_dw        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_count          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_fw             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil_dw        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_count       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_fw          : Factor w/ 80 levels "","0.0011","0.0012",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ gastropod_foil        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_foil_dw     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_count            : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_fw               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil_dw          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ polychaete_count      : int  6 NA NA NA NA 7 NA NA NA NA ...
##  $ other_animal_notes    : Factor w/ 33 levels "","\"crab\" is tellmesus",..: 1 1 1 1 1 12 1 1 1 1 ...
##  $ notes                 : Factor w/ 5 levels "","caprel insignificant weight",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ shoot_dw              : num  0.039 0.0629 0.0247 0.0489 0.0167 ...
##  $ max_length            : num  22.1 25.2 16.4 21 13.6 32.4 23 22.7 26 26.5 ...
##  $ mean_length           : num  22.1 25.2 16.4 21 13 32.4 23 16.5 26 26.5 ...
# subset only the data that was collected before July (the main part of the growing season)

eel_sub <- eel2 %>%
  filter(YYYYMMDD < 20170701)
str(eel_sub)
## Classes 'rowwise_df', 'tbl_df', 'tbl' and 'data.frame':  520 obs. of  74 variables:
##  $ site                  : Factor w/ 21 levels "2017_H_01","2017_H_02",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ collection_date       : Factor w/ 18 levels "20/8/17","21/8/17",..: 18 18 18 18 18 18 18 18 18 18 ...
##  $ YYYYMMDD              : int  20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 20170429 ...
##  $ quadrat               : int  1 1 1 1 1 2 2 2 2 2 ...
##  $ plant                 : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ rhi_length            : num  5 5 5 4.6 5 4.6 4 5 5 3.6 ...
##  $ node1                 : num  0.5 0.7 0.3 0.4 0.2 1.2 0.3 0.3 0.5 0.4 ...
##  $ node2                 : num  0.4 0.4 0.3 0.4 0.2 0.6 0.3 0.2 0.6 0.6 ...
##  $ node3                 : num  0.3 0.3 0.4 0.3 0.3 0.4 0.2 0.2 0.4 0.4 ...
##  $ node4                 : num  0.2 0.3 0.3 0.4 0.2 0.3 0.2 0.2 0.3 0.2 ...
##  $ node5                 : num  0.2 0.3 0.2 0.2 0.3 0.3 0.2 0.3 0.2 0.2 ...
##  $ leaf_length1          : num  22.1 25.2 16.4 21 13 32.4 23 16.5 26 26.5 ...
##  $ leaf_length2          : num  18.9 17.6 9.8 18 13.6 22.1 12.3 21.1 21.1 18.1 ...
##  $ leaf_length3          : num  14.5 22 14.1 8.5 9.6 25 16.1 22.7 20.2 25.7 ...
##  $ leaf_length4          : num  6.4 14.9 9.4 17 NA 27.3 9.3 8.7 10.2 11.3 ...
##  $ leaf_length5          : num  13.3 8 NA 16.5 NA 12.4 15.5 15.5 17.5 22.7 ...
##  $ leaf_length6          : num  NA 3.6 NA NA NA 9.2 NA NA NA NA ...
##  $ leaf_length7          : num  NA NA NA NA NA 6 NA NA NA NA ...
##  $ leaf_length8          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length9          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_length10         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width1           : num  0.3 0.3 0.2 0.3 0.2 0.4 0.2 0.2 0.3 0.4 ...
##  $ leaf_width2           : num  0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.2 0.3 ...
##  $ leaf_width3           : num  0.2 0.3 0.2 0.2 0.2 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width4           : num  0.2 0.3 0.2 0.2 NA 0.4 0.2 0.2 0.3 0.3 ...
##  $ leaf_width5           : num  0.2 0.3 NA 0.2 NA 0.2 0.3 0.3 0.2 0.4 ...
##  $ leaf_width6           : num  NA 0.2 NA NA NA 0.2 NA NA NA NA ...
##  $ leaf_width7           : num  NA NA NA NA NA 0.3 NA NA NA NA ...
##  $ leaf_width8           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width9           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ leaf_width10          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ pad_mass_g            : num  0.543 0.529 0.536 0.491 0.592 ...
##  $ pad_epiphyte_mass_g   : num  0.549 0.53 0.543 0.494 0.596 ...
##  $ rhi_mass_fw           : num  0.227 0.488 0.346 0.397 0.455 ...
##  $ rhi_foil              : num  0.658 0.427 0.45 0.376 0.55 ...
##  $ rhi_foil_dw_g         : num  0.684 0.478 0.49 0.411 0.586 ...
##  $ shoot_mass_fw         : num  0.269 0.412 0.165 0.321 0.116 ...
##  $ shoot_foil            : num  0.905 0.728 0.758 0.611 0.708 ...
##  $ shoot_foil_dw         : num  0.944 0.791 0.783 0.66 0.724 ...
##  $ xs_shoot_mass_fw      : num  4.77 NA NA NA NA ...
##  $ xs_shoot_foil         : num  0.789 NA NA NA NA ...
##  $ xs_shoot_foil_dw      : num  1.38 NA NA NA NA ...
##  $ xs_pad_mass_g         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ xs_epiphyte_pad_mass_g: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_count             : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_fw                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil              : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_foil_dw           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gamm_amph_count       : int  3 NA NA NA NA 21 NA NA NA NA ...
##  $ gamm_amph_fw          : num  0.0059 NA NA NA NA 0.036 NA NA NA NA ...
##  $ gamm_amph_foil        : num  0.495 NA NA NA NA ...
##  $ gamm_amph_foil_dw     : num  0.497 NA NA NA NA ...
##  $ caprel_count          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_fw             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ caprel_foil_dw        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_count          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_fw             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ limpet_foil_dw        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_count       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_fw          : Factor w/ 80 levels "","0.0011","0.0012",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ gastropod_foil        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ gastropod_foil_dw     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_count            : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_fw               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ crab_foil_dw          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ polychaete_count      : int  6 NA NA NA NA 7 NA NA NA NA ...
##  $ other_animal_notes    : Factor w/ 33 levels "","\"crab\" is tellmesus",..: 1 1 1 1 1 12 1 1 1 1 ...
##  $ notes                 : Factor w/ 5 levels "","caprel insignificant weight",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ shoot_dw              : num  0.039 0.0629 0.0247 0.0489 0.0167 ...
##  $ max_length            : num  22.1 25.2 16.4 21 13.6 32.4 23 22.7 26 26.5 ...
##  $ mean_length           : num  22.1 25.2 16.4 21 13 32.4 23 16.5 26 26.5 ...
levels(as.factor(eel_sub$YYYYMMDD))
##  [1] "20170429" "20170524" "20170525" "20170528" "20170622" "20170623"
##  [7] "20170624" "20170625" "20170627" "20170628"
summary(eel_sub)
##         site     collection_date    YYYYMMDD           quadrat    
##  2017_H_01: 40   29/4/17:120     Min.   :20170429   Min.   :1.00  
##  2017_H_02: 40   25/5/17: 80     1st Qu.:20170524   1st Qu.:2.75  
##  2017_H_03: 40   22/6/17: 40     Median :20170528   Median :4.50  
##  2017_H_04: 40   23/6/17: 40     Mean   :20170549   Mean   :4.50  
##  2017_L_01: 40   24/5/17: 40     3rd Qu.:20170624   3rd Qu.:6.25  
##  2017_L_02: 40   24/6/17: 40     Max.   :20170628   Max.   :8.00  
##  (Other)  :280   (Other):160                                      
##      plant     rhi_length        node1           node2      
##  Min.   :1   Min.   :0.800   Min.   :0.100   Min.   :0.100  
##  1st Qu.:2   1st Qu.:4.300   1st Qu.:0.600   1st Qu.:0.600  
##  Median :3   Median :5.000   Median :0.900   Median :1.100  
##  Mean   :3   Mean   :4.439   Mean   :1.018   Mean   :1.196  
##  3rd Qu.:4   3rd Qu.:5.000   3rd Qu.:1.300   3rd Qu.:1.600  
##  Max.   :5   Max.   :5.000   Max.   :3.000   Max.   :3.600  
##              NA's   :7       NA's   :7       NA's   :19     
##      node3            node4            node5         leaf_length1   
##  Min.   :0.1000   Min.   :0.1000   Min.   :0.1000   Min.   :  2.30  
##  1st Qu.:0.4000   1st Qu.:0.3000   1st Qu.:0.2000   1st Qu.: 19.00  
##  Median :0.7000   Median :0.5000   Median :0.3000   Median : 28.40  
##  Mean   :0.8411   Mean   :0.5618   Mean   :0.3634   Mean   : 33.15  
##  3rd Qu.:1.2000   3rd Qu.:0.7000   3rd Qu.:0.5000   3rd Qu.: 42.40  
##  Max.   :2.7000   Max.   :8.0000   Max.   :1.5000   Max.   :105.20  
##  NA's   :116      NA's   :206      NA's   :293      NA's   :7       
##   leaf_length2     leaf_length3     leaf_length4    leaf_length5   
##  Min.   :  1.70   Min.   :  3.10   Min.   : 2.10   Min.   :  1.90  
##  1st Qu.: 18.30   1st Qu.: 14.45   1st Qu.:15.55   1st Qu.: 15.65  
##  Median : 30.30   Median : 26.00   Median :28.00   Median : 26.40  
##  Mean   : 33.83   Mean   : 31.11   Mean   :31.02   Mean   : 29.54  
##  3rd Qu.: 46.20   3rd Qu.: 43.10   3rd Qu.:41.90   3rd Qu.: 39.25  
##  Max.   :110.50   Max.   :109.90   Max.   :96.00   Max.   :111.70  
##  NA's   :7        NA's   :8        NA's   :22      NA's   :117     
##   leaf_length6    leaf_length7    leaf_length8     leaf_length9  
##  Min.   : 2.10   Min.   : 1.30   Min.   : 2.600   Min.   : 3.00  
##  1st Qu.:12.53   1st Qu.:14.45   1st Qu.: 7.475   1st Qu.: 8.25  
##  Median :27.10   Median :25.00   Median :21.000   Median :14.05  
##  Mean   :30.06   Mean   :28.79   Mean   :27.190   Mean   :19.22  
##  3rd Qu.:40.35   3rd Qu.:41.15   3rd Qu.:36.900   3rd Qu.:28.02  
##  Max.   :94.20   Max.   :82.60   Max.   :97.200   Max.   :45.20  
##  NA's   :310     NA's   :433     NA's   :490      NA's   :514    
##  leaf_length10    leaf_width1      leaf_width2     leaf_width3    
##  Min.   :16.00   Min.   :0.1000   Min.   :0.100   Min.   :0.1000  
##  1st Qu.:18.18   1st Qu.:0.3000   1st Qu.:0.300   1st Qu.:0.3000  
##  Median :20.35   Median :0.4000   Median :0.400   Median :0.4000  
##  Mean   :20.35   Mean   :0.3739   Mean   :0.383   Mean   :0.3918  
##  3rd Qu.:22.52   3rd Qu.:0.5000   3rd Qu.:0.500   3rd Qu.:0.5000  
##  Max.   :24.70   Max.   :0.8000   Max.   :0.800   Max.   :0.8000  
##  NA's   :518     NA's   :7        NA's   :7       NA's   :8       
##   leaf_width4      leaf_width5     leaf_width6      leaf_width7    
##  Min.   :0.1000   Min.   :0.100   Min.   :0.1000   Min.   :0.2000  
##  1st Qu.:0.3000   1st Qu.:0.300   1st Qu.:0.3000   1st Qu.:0.3000  
##  Median :0.4000   Median :0.400   Median :0.4000   Median :0.4000  
##  Mean   :0.3946   Mean   :0.394   Mean   :0.3857   Mean   :0.3851  
##  3rd Qu.:0.5000   3rd Qu.:0.500   3rd Qu.:0.5000   3rd Qu.:0.4000  
##  Max.   :0.8000   Max.   :0.800   Max.   :0.7000   Max.   :0.7000  
##  NA's   :22       NA's   :117     NA's   :310      NA's   :433     
##   leaf_width8      leaf_width9      leaf_width10     pad_mass_g    
##  Min.   :0.2000   Min.   :0.2000   Min.   :0.300   Min.   :0.4728  
##  1st Qu.:0.3000   1st Qu.:0.3000   1st Qu.:0.325   1st Qu.:0.6298  
##  Median :0.4000   Median :0.3000   Median :0.350   Median :0.6655  
##  Mean   :0.3867   Mean   :0.3167   Mean   :0.350   Mean   :0.6603  
##  3rd Qu.:0.4000   3rd Qu.:0.3750   3rd Qu.:0.375   3rd Qu.:0.7017  
##  Max.   :0.6000   Max.   :0.4000   Max.   :0.400   Max.   :0.8519  
##  NA's   :490      NA's   :514      NA's   :518     NA's   :17      
##  pad_epiphyte_mass_g  rhi_mass_fw        rhi_foil      rhi_foil_dw_g   
##  Min.   :0.4806      Min.   :0.0353   Min.   :0.2876   Min.   :0.3297  
##  1st Qu.:0.6351      1st Qu.:0.3652   1st Qu.:0.7200   1st Qu.:0.7841  
##  Median :0.6735      Median :0.5143   Median :0.8724   Median :0.9502  
##  Mean   :0.6667      Mean   :0.5399   Mean   :0.9020   Mean   :0.9759  
##  3rd Qu.:0.7066      3rd Qu.:0.6996   3rd Qu.:1.1072   3rd Qu.:1.1889  
##  Max.   :0.9698      Max.   :1.4428   Max.   :1.4849   Max.   :1.6390  
##  NA's   :17          NA's   :7        NA's   :7        NA's   :7       
##  shoot_mass_fw      shoot_foil     shoot_foil_dw    xs_shoot_mass_fw 
##  Min.   :0.0293   Min.   :0.4336   Min.   :0.4864   Min.   : 0.0243  
##  1st Qu.:0.5250   1st Qu.:1.0716   1st Qu.:1.2224   1st Qu.: 2.9386  
##  Median :1.0255   Median :1.3160   Median :1.5332   Median : 6.2291  
##  Mean   :1.3357   Mean   :1.3830   Mean   :1.5803   Mean   :10.4281  
##  3rd Qu.:1.7271   3rd Qu.:1.7837   3rd Qu.:2.0987   3rd Qu.:13.7739  
##  Max.   :6.6599   Max.   :2.8118   Max.   :2.9439   Max.   :66.8455  
##  NA's   :7        NA's   :7        NA's   :7        NA's   :416      
##  xs_shoot_foil    xs_shoot_foil_dw  xs_pad_mass_g   
##  Min.   :0.5372   Min.   : 0.6509   Min.   :0.7107  
##  1st Qu.:1.1943   1st Qu.: 1.8682   1st Qu.:0.7107  
##  Median :1.5204   Median : 2.4118   Median :0.7107  
##  Mean   :1.8368   Mean   : 3.1024   Mean   :0.7107  
##  3rd Qu.:2.0028   3rd Qu.: 3.2686   3rd Qu.:0.7107  
##  Max.   :5.3852   Max.   :12.5068   Max.   :0.7107  
##  NA's   :416      NA's   :416       NA's   :519     
##  xs_epiphyte_pad_mass_g   iso_count         iso_fw          iso_foil     
##  Min.   :0.7444         Min.   :1.000   Min.   :0.0002   Min.   :0.3042  
##  1st Qu.:0.7444         1st Qu.:1.000   1st Qu.:0.0050   1st Qu.:0.4584  
##  Median :0.7444         Median :1.000   Median :0.1364   Median :0.5225  
##  Mean   :0.7444         Mean   :1.487   Mean   :0.1862   Mean   :0.5613  
##  3rd Qu.:0.7444         3rd Qu.:2.000   3rd Qu.:0.2212   3rd Qu.:0.6674  
##  Max.   :0.7444         Max.   :5.000   Max.   :1.2169   Max.   :0.9067  
##  NA's   :519            NA's   :481     NA's   :481      NA's   :481     
##   iso_foil_dw     gamm_amph_count   gamm_amph_fw    gamm_amph_foil  
##  Min.   :0.3756   Min.   : 1.000   Min.   :0.0004   Min.   :0.2773  
##  1st Qu.:0.4948   1st Qu.: 2.000   1st Qu.:0.0046   1st Qu.:0.4560  
##  Median :0.5599   Median : 3.000   Median :0.0075   Median :0.5098  
##  Mean   :0.6097   Mean   : 5.678   Mean   :0.0147   Mean   :0.5325  
##  3rd Qu.:0.7326   3rd Qu.: 8.000   3rd Qu.:0.0140   3rd Qu.:0.5708  
##  Max.   :0.9509   Max.   :33.000   Max.   :0.1460   Max.   :1.0428  
##  NA's   :481      NA's   :461      NA's   :462      NA's   :462     
##  gamm_amph_foil_dw  caprel_count      caprel_fw       caprel_foil    
##  Min.   :0.2779    Min.   : 1.000   Min.   :0.0003   Min.   :0.0001  
##  1st Qu.:0.4568    1st Qu.: 1.000   1st Qu.:0.0026   1st Qu.:0.4412  
##  Median :0.5100    Median : 2.000   Median :0.0074   Median :0.5155  
##  Mean   :0.5336    Mean   : 4.979   Mean   :0.0424   Mean   :0.5176  
##  3rd Qu.:0.5727    3rd Qu.: 5.000   3rd Qu.:0.0234   3rd Qu.:0.6414  
##  Max.   :1.0438    Max.   :28.000   Max.   :0.6490   Max.   :1.0819  
##  NA's   :462       NA's   :472      NA's   :476      NA's   :473     
##  caprel_foil_dw    limpet_count      limpet_fw       limpet_foil    
##  Min.   :0.0002   Min.   : 1.000   Min.   :0.0037   Min.   :0.2127  
##  1st Qu.:0.4434   1st Qu.: 2.000   1st Qu.:0.0216   1st Qu.:0.4171  
##  Median :0.5183   Median : 3.000   Median :0.0464   Median :0.5199  
##  Mean   :0.5195   Mean   : 4.181   Mean   :0.1183   Mean   :0.5230  
##  3rd Qu.:0.6454   3rd Qu.: 5.250   3rd Qu.:0.1445   3rd Qu.:0.6012  
##  Max.   :1.0860   Max.   :15.000   Max.   :0.8388   Max.   :1.0519  
##  NA's   :473      NA's   :448      NA's   :448      NA's   :448     
##  limpet_foil_dw   gastropod_count                     gastropod_fw
##  Min.   :0.2152   Min.   : 1.000                            :478  
##  1st Qu.:0.4538   1st Qu.: 1.250   0.0037                   :  2  
##  Median :0.5500   Median : 2.000   0.0056                   :  2  
##  Mean   :0.5587   Mean   : 3.405   limpet = Lottia parallela:  2  
##  3rd Qu.:0.6429   3rd Qu.: 4.000   0.0011                   :  1  
##  Max.   :1.0611   Max.   :16.000   0.0012                   :  1  
##  NA's   :448      NA's   :478      (Other)                  : 34  
##  gastropod_foil   gastropod_foil_dw   crab_count     crab_fw   
##  Min.   :0.0001   Min.   :0.0002    Min.   : NA   Min.   : NA  
##  1st Qu.:0.3992   1st Qu.:0.4186    1st Qu.: NA   1st Qu.: NA  
##  Median :0.5174   Median :0.5216    Median : NA   Median : NA  
##  Mean   :0.5256   Mean   :0.5394    Mean   :NaN   Mean   :NaN  
##  3rd Qu.:0.6185   3rd Qu.:0.6308    3rd Qu.: NA   3rd Qu.: NA  
##  Max.   :1.0419   Max.   :1.0514    Max.   : NA   Max.   : NA  
##  NA's   :478      NA's   :478       NA's   :520   NA's   :520  
##    crab_foil    crab_foil_dw polychaete_count  other_animal_notes
##  Min.   : NA   Min.   : NA   Min.   : 1.000             :490     
##  1st Qu.: NA   1st Qu.: NA   1st Qu.: 3.000   1 bivalve :  7     
##  Median : NA   Median : NA   Median : 4.000   2 bivalves:  4     
##  Mean   :NaN   Mean   :NaN   Mean   : 5.267   1 clam    :  3     
##  3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: 6.000   1 barnacle:  2     
##  Max.   : NA   Max.   : NA   Max.   :35.000   3 bivalves:  2     
##  NA's   :520   NA's   :520   NA's   :419      (Other)   : 12     
##                                     notes        shoot_dw     
##                                        :517   Min.   :0.0042  
##  caprel insignificant weight           :  0   1st Qu.:0.0938  
##  caprel so small it was not measurable :  1   Median :0.1576  
##  FW missing from gastropod             :  1   Mean   :0.1973  
##  gastropod lost, DW unknow             :  1   3rd Qu.:0.2575  
##                                               Max.   :0.8045  
##                                               NA's   :7       
##    max_length      mean_length    
##  Min.   :  -Inf   Min.   :  2.30  
##  1st Qu.: 29.27   1st Qu.: 19.00  
##  Median : 43.40   Median : 28.40  
##  Mean   :  -Inf   Mean   : 33.15  
##  3rd Qu.: 57.23   3rd Qu.: 42.40  
##  Max.   :111.70   Max.   :105.20  
##                   NA's   :7
eel_sub2 <- na.omit(data.frame(eel_sub$shoot_dw, eel_sub$max_length, eel_sub$mean_length, eel_sub$shoot_mass_fw))
names(eel_sub2) <- c("shoot_dw", "max_length", "mean_length", "shoot_mass_fw")

Look at data

plot(eel_sub2$max_length, eel_sub2$shoot_dw)

# Looks like the variance in shoot dw might be increasing with the the length (mm) of the longest blade. 
plot(eel_sub2$mean_length, eel_sub2$shoot_dw)

plot(eel_sub2$max_length, eel_sub2$shoot_mass_fw)

# not as tight as a correlation
cor(eel_sub2$max_length, eel_sub2$shoot_dw, use = "complete.obs")
## [1] 0.8782586
cor(eel_sub2$mean_length, eel_sub2$shoot_dw, use = "complete.obs")
## [1] 0.66646
cor(eel_sub2$mean_length, eel_sub2$shoot_mass_fw, use = "complete.obs")
## [1] 0.6754984
# stronger positive linear relationship with max length than average length (makes sense)
par(mfrow=c(1,2))
boxplot(eel_sub2$max_length)
boxplot(eel_sub2$shoot_dw)

# Looks like both varibles may have some outliers, in particular the shoot dry weight. 

par(mfrow=c(1,2))
plot(density(eel_sub2$max_length), ylab = "Frequency")
plot(density(eel_sub2$shoot_dw, na.rm = T), ylab = "Frequency")

hist(eel_sub2$max_length)
# max length looks pretty normal with some slight right? skew
hist(eel_sub2$shoot_dw)

# heavily right skewed does log make it look better?
hist(log(eel_sub2$shoot_dw))
# skews it the other way... 

Linear models

data only before july

dat <- na.omit(data.frame(eel_sub2$shoot_dw, eel_sub2$max_length))
names(dat) <- c("dw", "max_length")

plot(dat$dw, dat$max_length)

# Lets fit an untransformed linear model 
fit.lm <- lm(dw ~ max_length, data = dat)
plot(fit.lm)

# looks like there might be some funky stuff happening with the residuals -- maybe an increase in variance along the fitted values. And the QQ plot looks real gross with some concavity (i.e. skewness) and some possible outliers. The cooks plot makes it look like there case numbers (171, 230, 498) could be influencial outliers. Some of this is consisten with the histogram of the data (see above)
summary(fit.lm)
## 
## Call:
## lm(formula = dw ~ max_length, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20351 -0.04010 -0.00246  0.02434  0.52698 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0917116  0.0076619  -11.97   <2e-16 ***
## max_length   0.0062482  0.0001505   41.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07254 on 511 degrees of freedom
## Multiple R-squared:  0.7713, Adjusted R-squared:  0.7709 
## F-statistic:  1724 on 1 and 511 DF,  p-value: < 2.2e-16
AIC(fit.lm); BIC(fit.lm)
## [1] -1232.068
## [1] -1219.347
# looks like max length is significantly different than 0
# Has an adjusted r2 of 69%... pretty good for ecology. Model is significant, coefficients for the model are not equal to zero (p value =~0)
library(e1071)
skewness(dat$dw)
## [1] 1.654238
skewness(dat$max_length)
## [1] 0.8226692
# the response variable is skewed.... highly skewed

e <- residuals(fit.lm)
shapiro.test(e)
## 
##  Shapiro-Wilk normality test
## 
## data:  e
## W = 0.87574, p-value < 2.2e-16
n <- length(dat$dw)

ggplot(dat, aes(x = max_length, y = dw)) + geom_point() + theme_classic() +
  geom_smooth(method = "lm", se = TRUE)

# Does a boxcox indicate a transformation is necessary? 
library(MASS)
boxcox(fit.lm)

boxcox(fit.lm, lambda=seq(from=0, to=0.6, by=.01)) 

# Umm of like square root?
# Could also try a log



hist(dat$dw^0.5)

y.2 <- dat$y^(0.5)
# looks way better with a fourth root transformation, normal distribution
fit2 <- lm(dw^(0.5) ~ max_length, data = dat)
plot(fit2)

# looks a lot better, homoscedascity is good, qq plot tails are a little weird, still some outliers
summary(fit2)
## 
## Call:
## lm(formula = dw^(0.5) ~ max_length, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.19936 -0.04459 -0.00439  0.03367  0.41670 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1044604  0.0071964   14.52   <2e-16 ***
## max_length  0.0067145  0.0001414   47.50   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06813 on 511 degrees of freedom
## Multiple R-squared:  0.8154, Adjusted R-squared:  0.815 
## F-statistic:  2256 on 1 and 511 DF,  p-value: < 2.2e-16
# r squared is better, model is still significant
plot(dat$max_length, dat$dw^(0.5))

# Plot the square root transformed data
lm_eqn <- function(dat){
    m <- fit2;
    eq <- substitute(italic(sqrt(y)) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}
ggplot(dat, aes(x = max_length, y = dw^(0.5))) + geom_point() + geom_smooth(method = "lm", se = TRUE, level = 0.95) + geom_text(x = 25, y = 0.8, label = lm_eqn(dat), parse = TRUE, check_overlap = TRUE) + theme_classic()

# Compare to a logtransformed model (easier to understand)

fit3 <- lm(log(dw) ~ max_length, data = dat)
plot(fit3)

summary(fit3)
## 
## Call:
## lm(formula = log(dw) ~ max_length, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.26096 -0.21548  0.03841  0.26030  1.43709 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.4469963  0.0443016  -77.81   <2e-16 ***
## max_length   0.0331391  0.0008702   38.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4194 on 511 degrees of freedom
## Multiple R-squared:  0.7395, Adjusted R-squared:  0.739 
## F-statistic:  1450 on 1 and 511 DF,  p-value: < 2.2e-16
hist(log(dat$dw))

plot(dat$max_length, log(dat$dw))

# log-log transformation
lm2 <- lm(log(shoot_dw) ~ log(max_length), data = eel_sub2)
summary(lm2)
## 
## Call:
## lm(formula = log(shoot_dw) ~ log(max_length), data = eel_sub2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0858 -0.2056  0.0128  0.1736  1.2739 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -7.78423    0.11222  -69.37   <2e-16 ***
## log(max_length)  1.57549    0.02987   52.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3237 on 511 degrees of freedom
## Multiple R-squared:  0.8448, Adjusted R-squared:  0.8445 
## F-statistic:  2781 on 1 and 511 DF,  p-value: < 2.2e-16
plot(lm2)

# plot the log-log transformed data with linear model 
lm_eqn <- function(dat){
    m <- lm2;
    eq <- substitute(italic(log(y)) == a + b %.% italic(log(x))*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}
ggplot(dat, aes(x = log(max_length), y = log(dw))) + geom_point() + geom_smooth(method = "lm", se = TRUE, level = 0.95) + geom_text(x = 2.5, y = -0.5, label = lm_eqn(dat), parse = TRUE, check_overlap = TRUE) + 
  theme_classic()

visreg(lm2, gg = T)

Use selected linear model to predict 2019 biomass data

Import 2019 data

The data for 2019: is set up in a 3 sheet excel spreadsheets. quadrat_ID in hab_qrt is unique for every quadrat that was done in summer 2019. Use the quadrat_ID to connect to the other sheets (hab_lng and hab_wgt). Hab_lng has all the individual lengths for all measured plants (in mm) (for eelgrass its 15 blades from each quadrat/site. For kelp its up to 3 of each species collected from the site). Hab_wgt has the biomass weights for individual species biomass by bag

For this purpose we are interested in only the length data but need to use the quadrat sheet to make sure we only are looking at the eelgrass sites newest version of data is : RAW_10-22-19

hab_qrt <- read_excel("../ALL_DATA/Lia_fish/Habitat_2019_RAW_10-23-19.xlsx", sheet = 1)
hab_lng <- read_excel("../ALL_DATA/Lia_fish/Habitat_2019_RAW_10-23-19.xlsx", sheet = 2)
hab_wgt <- read_excel("../ALL_DATA/Lia_fish/Habitat_2019_RAW_10-23-19.xlsx", sheet = 3)

Data cleaning

We want only the eelgrass sites and need to convert the length measurements (in mm) to cm

glimpse(hab_qrt)
## Observations: 125
## Variables: 10
## $ quadrat_ID       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, …
## $ site             <chr> "Shakan - kelp", "Shakan - kelp", "Shakan - kel…
## $ date             <dttm> 2019-06-08, 2019-06-08, 2019-06-08, 2019-06-08…
## $ YYYYMMDD         <dbl> 20190608, 20190608, 20190608, 20190608, 2019060…
## $ habitat          <chr> "kelp", "kelp", "kelp", "kelp", "kelp", "seagra…
## $ quadrat          <dbl> 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1,…
## $ total_biomass    <chr> "620.9", "230.4", "869.4", "716.0", "748.7", "N…
## $ density          <chr> "NA", "NA", "NA", "NA", "NA", "29.0", "18.0", "…
## $ flowering_shoots <chr> "NA", "NA", "NA", "NA", "NA", "0.0", "0.0", "0.…
## $ notes            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
# Want to only extract the quadrat numbers that were used to sample eelgrass sites
eel_qrts <- na.omit(ifelse(hab_qrt$habitat == "seagrass", paste(hab_qrt$quadrat_ID), NA))
# subset the length data by quadrat at an eelgrass site
lng_sub <- subset(hab_lng, quadrat_ID %in% eel_qrts)
levels(as.factor(lng_sub$species)) # only Z. marina, awesome! 
## [1] "Zostera marina"
lng_sub <- lng_sub %>%
  mutate(length_cm = length/100)

Predict based on linear models

summary(lm2)
## 
## Call:
## lm(formula = log(shoot_dw) ~ log(max_length), data = eel_sub2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0858 -0.2056  0.0128  0.1736  1.2739 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -7.78423    0.11222  -69.37   <2e-16 ***
## log(max_length)  1.57549    0.02987   52.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3237 on 511 degrees of freedom
## Multiple R-squared:  0.8448, Adjusted R-squared:  0.8445 
## F-statistic:  2781 on 1 and 511 DF,  p-value: < 2.2e-16
# formula :
#   log (y) = beta0 + beta1 * log (x) 
#   log (dw) = -7.78 + 1.57 * log (max_length) 
# create a new dataframe with the leaf lengths from 2019
newx <- data.frame(max_length = as.numeric(lng_sub$length_cm))
# use the log-log linear model (equation above) to calculate the dry mass of the shoot with 95% CI 
# make sure to exp() the values because its a log-log relationship
pr.lm <- exp(predict(lm2, newdata = newx, interval = "confidence", level = 0.95))

# graph these new predicted values
# create data.frame first
newdata <- cbind(lng_sub, pr.lm)
ggplot(newdata, aes(x = length_cm, y = fit)) + geom_point() + theme_classic() + geom_smooth(method = "lm")

Estimate total biomass

str(newdata)
## 'data.frame':    1503 obs. of  8 variables:
##  $ plant_ID  : num  27 28 29 30 31 32 33 34 35 36 ...
##  $ quadrat_ID: num  6 6 6 6 6 6 6 6 6 6 ...
##  $ species   : chr  "Zostera marina" "Zostera marina" "Zostera marina" "Zostera marina" ...
##  $ length    : num  579 835 375 334 691 323 581 418 852 544 ...
##  $ length_cm : num  5.79 8.35 3.75 3.34 6.91 3.23 5.81 4.18 8.52 5.44 ...
##  $ fit       : num  0.00662 0.01179 0.00334 0.00278 0.00875 ...
##  $ lwr       : num  0.00588 0.01069 0.00289 0.00239 0.00785 ...
##  $ upr       : num  0.00746 0.01301 0.00386 0.00324 0.00976 ...
str(hab_qrt)
## Classes 'tbl_df', 'tbl' and 'data.frame':    125 obs. of  10 variables:
##  $ quadrat_ID      : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ site            : chr  "Shakan - kelp" "Shakan - kelp" "Shakan - kelp" "Shakan - kelp" ...
##  $ date            : POSIXct, format: "2019-06-08" "2019-06-08" ...
##  $ YYYYMMDD        : num  20190608 20190608 20190608 20190608 20190608 ...
##  $ habitat         : chr  "kelp" "kelp" "kelp" "kelp" ...
##  $ quadrat         : num  1 2 3 4 5 1 2 3 4 5 ...
##  $ total_biomass   : chr  "620.9" "230.4" "869.4" "716.0" ...
##  $ density         : chr  "NA" "NA" "NA" "NA" ...
##  $ flowering_shoots: chr  "NA" "NA" "NA" "NA" ...
##  $ notes           : chr  NA NA NA NA ...
hab_qrt$density <- as.numeric(hab_qrt$density)
## Warning: NAs introduced by coercion
hab_qrt$flowering_shoots <- as.numeric(hab_qrt$flowering_shoots)
## Warning: NAs introduced by coercion
# Want to only extract the quadrat numbers that were used to sample eelgrass sites
dsty <- hab_qrt %>%
  filter(habitat == 'seagrass') %>%
  mutate(density_m2 = (density)*4) %>%
  mutate(flowering_m2 = (flowering_shoots)*4)
dsty <- dsty[,c(1,8,9,11,12)]

# calculate average biomass for 15 shoots for each quadrat
require(dplyr)
df <- newdata %>%
  group_by(quadrat_ID) %>%
  mutate(avg_biomass = mean(fit))
df1 <- df %>%
  dplyr::select(quadrat_ID, avg_biomass) %>%
  distinct()

# need to add back in site information

df2 <- left_join(df1, dsty, by = "quadrat_ID")
df3 <- left_join(df2, hab_qrt[,1:2], by = "quadrat_ID")
write.csv(df3, "../ALL_DATA/seagrass_biomass_conversions.csv")